//==============================================================================
// Project:		Wealth Transfers and their Economic Effects
// File name:	Expected deaths bias regression
// Objective: 	Test whether the inheritances outflow estimate based on expected
//				deaths is biased due to a correlation between wealth and
//				probability of death after controlling for observed variables.
//
// Created: 	07/10/2021
// Modified: 	16/11/2021
//==============================================================================

****************
*** Bequests ***
****************

*************************************
*** Loading and cleaning the data ***
*************************************

clear all

cd "XXXX"	// set the content directory here

use HILDA_restricted_combined_rescaled	// importing the data

drop if hgint == 0 // drop non-responders

******************************
*** Creating key variables ***
******************************

gen wealth = pwassei - pwdebti	// generating a wealth variable

gen log_wealth = log(wealth)	// generating a log wealth variable

codebook mrcurr	// checking the marital status variable

// creating a married indicator variable
gen marital_status = 0
replace marital_status = 1 if mrcurr == 1 | mrcurr == 3 //married
replace marital_status = . if mrcurr < 0 //not married (inc de facto)

gen died = 0
replace died = 1 if yodeath == wave + 2000 | yodeath == wave + 2001	// generating a died indicator variable for whether the respondent died in the year of the survey or the next year

codebook sex	// checking the gender variable

egen age_band = cut(hgage), at(0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100)	// generating five year age bands

***************************
*** Regression analysis ***
***************************

// primary regressions - age * gender interaction terms, split by marital status

reg wealth i.sex#i.hgage died [pweight = hhwtrps] if marital_status == 0, vce(cluster xwaveid)	// coefficient on died is -71643.6, 95% CI is [-179086.6   35799.42]

reg wealth i.sex#i.hgage died [pweight = hhwtrps] if marital_status == 1, vce(cluster xwaveid)	// coefficient on died is -216239.1, 95% CI is [-288125.8   -144352.5]

// secondary regressions - as above but with age in five year bands

reg wealth i.sex#i.age_band died [pweight = hhwtrps] if marital_status == 0, vce(cluster xwaveid)	// coefficient on died is -61764.76, 95% CI is [-165779.2   42249.68]

reg wealth i.sex#i.age_band died [pweight = hhwtrps] if marital_status == 1, vce(cluster xwaveid)	// coefficient on died is -210561.7, 95% CI is [-283913   -137209.9]

// alternate regressions - age * gender * marital status interaction terms, log wealth as well

reg wealth i.sex#i.hgage#i.marital_status died [pweight = hhwtrps] if wave == 2 | wave == 6 | wave == 10 | wave == 14  | wave == 18, vce(cluster xwaveid)	// coefficient on died is -154308.2, 95% CI is [-216913.7   -91702.71]

reg log_wealth i.sex#i.hgage#i.marital_status died [pweight = hhwtrps] if wave == 2 | wave == 6 | wave == 10 | wave == 14  | wave == 18, vce(cluster xwaveid)	// coefficient on died is -.5389178, 95% CI is [-.7518268   -.3260088]

reg log_wealth i.sex#i.hgage died [pweight = hhwtrps] if marital_status == 0, vce(cluster xwaveid)	// coefficient on died is -.4358491, 95% CI is [-.7635652   -.1081331]

reg wealth i.sex#i.hgage#i.marital_status#died [pweight = hhwtrps], vce(cluster xwaveid)